{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# treedepth,childweight调参"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'Desktop/RentListingInquries/code/data/'\n",
    "test = pd.read_csv(path+'RentListingInquries_FE_test.csv')\n",
    "train = pd.read_csv(path+'RentListingInquries_FE_train.csv')\n",
    "y_train = train['interest_level']\n",
    "x_train = train.drop('interest_level',axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.59930, std: 0.00290, params: {'max_depth': 3, 'min_child_weight': 1},\n",
       "  mean: -0.59957, std: 0.00295, params: {'max_depth': 3, 'min_child_weight': 3},\n",
       "  mean: -0.59950, std: 0.00322, params: {'max_depth': 3, 'min_child_weight': 5},\n",
       "  mean: -0.58808, std: 0.00428, params: {'max_depth': 5, 'min_child_weight': 1},\n",
       "  mean: -0.58892, std: 0.00402, params: {'max_depth': 5, 'min_child_weight': 3},\n",
       "  mean: -0.58844, std: 0.00341, params: {'max_depth': 5, 'min_child_weight': 5},\n",
       "  mean: -0.59105, std: 0.00305, params: {'max_depth': 7, 'min_child_weight': 1},\n",
       "  mean: -0.59034, std: 0.00361, params: {'max_depth': 7, 'min_child_weight': 3},\n",
       "  mean: -0.58981, std: 0.00391, params: {'max_depth': 7, 'min_child_weight': 5},\n",
       "  mean: -0.60867, std: 0.00392, params: {'max_depth': 9, 'min_child_weight': 1},\n",
       "  mean: -0.60332, std: 0.00536, params: {'max_depth': 9, 'min_child_weight': 3},\n",
       "  mean: -0.59881, std: 0.00378, params: {'max_depth': 9, 'min_child_weight': 5}],\n",
       " {'max_depth': 5, 'min_child_weight': 1},\n",
       " -0.5880773269818923)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#这部分采用sklearn的xgb，一开始，先两个参数一起调\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=3)\n",
    "max_depth = range(3,10,2)\n",
    "min_child_weight = range(1,6,2)\n",
    "param_test2 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "xgb2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  \n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2 = GridSearchCV(xgb2, param_grid = param_test2, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch2.fit(x_train , y_train)\n",
    "\n",
    "gsearch2.grid_scores_, gsearch2.best_params_,     gsearch2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "综合两个参数，最好的结果是：'max_depth': 5, 'min_child_weight': 1。再整体观察，发现树的深度在5左右，因此再细调。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.59930, std: 0.00290, params: {'max_depth': 3},\n",
       "  mean: -0.59139, std: 0.00318, params: {'max_depth': 4},\n",
       "  mean: -0.58808, std: 0.00428, params: {'max_depth': 5},\n",
       "  mean: -0.58947, std: 0.00313, params: {'max_depth': 6}],\n",
       " {'max_depth': 5},\n",
       " -0.5880773269818923)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_depth = [3,4,5,6]\n",
    "param_test2 = dict(max_depth=max_depth)\n",
    "xgb2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2 = GridSearchCV(xgb2, param_grid = param_test2, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch2.fit(x_train , y_train)\n",
    "\n",
    "gsearch2.grid_scores_, gsearch2.best_params_,     gsearch2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "因此，基本确定最佳的treedepth参数是5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58808, std: 0.00428, params: {'min_child_weight': 1},\n",
       "  mean: -0.58885, std: 0.00297, params: {'min_child_weight': 2},\n",
       "  mean: -0.58892, std: 0.00402, params: {'min_child_weight': 3},\n",
       "  mean: -0.58882, std: 0.00356, params: {'min_child_weight': 4},\n",
       "  mean: -0.58844, std: 0.00341, params: {'min_child_weight': 5}],\n",
       " {'min_child_weight': 1},\n",
       " -0.5880773269818923)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#固定treedepth，再调整min_child_weight\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=3)\n",
    "min_child_weight = range(1,6)\n",
    "param_test2 = dict(min_child_weight=min_child_weight)\n",
    "xgb2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=261,  \n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2 = GridSearchCV(xgb2, param_grid = param_test2, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch2.fit(x_train , y_train)\n",
    "\n",
    "gsearch2.grid_scores_, gsearch2.best_params_,     gsearch2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到，细化后的参数与之前的结果相同，这两个参数的最佳组合是'max_depth': 5, 'min_child_weight': 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
